library("tidyverse")
library("topicmodels")
library("tidytext")
library("stopwords")
library("ggwordcloud")
library("ggthemes")
library("magrittr")
library("stringr")
library("cowplot")
library("ggrepel")

Loading Raw data

yelp.raw <- read_csv("yelp_coffeeshop.csv")
#adding review id

yelp.raw$review.id <- row.names(yelp.raw)
yelp.raw

Cleaning

Removing check-in/check-ins, and number of check ins

for (i in 1:nrow(yelp.raw)){
  yelp.raw$review_text[i] <- str_replace(yelp.raw$review_text[i],"^[\\d]+ check-in.","")
}
yelp.raw %>% slice_sample(n=10)

Tokenize

yelp.tidy <- yelp.raw %>%
                unnest_tokens(output = "word", input = "review_text", token = "words") %>%
                select(rating, word, review.id)

head(yelp.tidy)

Stop Words Removed

yelp_wc <- yelp.tidy %>%
              group_by(rating, review.id, word) %>%
              count() %>% ungroup() #to finish grouping

#Removing Stop words
yelp_wc <- yelp.tidy %>%
              anti_join(get_stopwords(), by = "word") %>%
              group_by(rating, word, review.id) %>%
              count() %>% ungroup

head(yelp_wc)

TFID

yelp_tf_idf.full <- yelp_wc %>%
                  bind_tf_idf(term = word, document = review.id, n = n)
head(yelp_tf_idf.full)

topic model

yelp_tf_idf.full %>%
  ggplot(aes(x = tf_idf, y = ..density..)) +
  geom_density(fill = "firebrick", col = "firebrick", alpha = 0.5) +
  theme_gdocs()

Zooming in

yelp_tf_idf.full %>%
  ggplot(aes(x = tf_idf, y = ..density..)) +
  geom_density(fill = "firebrick", col = "firebrick", alpha = 0.5) +
  coord_cartesian(xlim = c(0.05,0.5)) +
  theme_gdocs()

Choosing cutoff

Choosing of cutoff is done by checking the wordcloud/topic after the cutoff

cloud.by.tf_idf.cutoff <- function(data = yelp_tf_idf.full, cutoff, stopwords = get_stopwords()){
  yelp_clean <- data %>%
                  filter(tf_idf > cutoff) %>%
                  anti_join(y = stopwords, by = 'word') 
  #group sum
  yelp_clean %>%
    group_by(rating) %>% summarise(sum = sum(n)) -> grouped.sum
  
  yelp_clean %>%
    group_by(rating,word) %>%
    summarise(n=sum(n)) %>%
    left_join(grouped.sum,by="rating")%>% 
    mutate(prop = n/sum) %>%
    arrange(desc(prop)) %>% slice_head(n=10) %>%
  ggplot(aes(label = word, size = prop)) +
  suppressWarnings(geom_text_wordcloud()) +
  scale_size_area(max_size = 9) +
  facet_wrap(~ as.factor(rating)) +
  theme_minimal() -> by.rating
  
  
  yelp_wc.as.one <- yelp_clean %>% dplyr::select(word,n) %>%
    group_by(word) %>% summarise(n = sum(n))
  
  
  yelp_wc.as.one %>% arrange(desc(n)) %>% slice_head(n=15) %>%
    ggplot(aes(label = word, size = n, col = word)) +
    geom_text_wordcloud() +
    scale_size_area(max_size = 20) +
    theme_minimal() -> p
  
  return(list(cut.data = yelp_clean,
              as.whole = p,
              by.rating = by.rating))
}

cloud.by.tf_idf.cutoff(cutoff = 0.1) -> first
cloud.by.tf_idf.cutoff(cutoff = 0.05) -> second
cloud.by.tf_idf.cutoff(cutoff = 0.15) -> third

cutoff - 0.1

first$by.rating

first$as.whole

the result is good, most of the collection of words/topic makes sense. We know what the reviews are about.

exploring:

cutoff - 0.05

second$by.rating

second$as.whole

not much of an info here aside from food and adjective used to describe service.

cutoff - 0.15

third$by.rating

third$as.whole

this cutoff seems informative, but there are still good information at 0.1 cutoff

further cleaning (removing adjectives)

checking

yelp.stopwords <- get_stopwords()
adjectives <- c("amazing","great","love","awesome","best","always","foods","super","2","ok","okay","5")
yelp.stopwords <- rbind(yelp.stopwords,data.frame(word = adjectives,
                                                  lexicon = "snowball"))

cloud.by.tf_idf.cutoff(cutoff = 0.1,stopwords = yelp.stopwords) -> fourth
## `summarise()` has grouped output by 'rating'. You can override using the `.groups` argument.
fourth$by.rating

fourth$as.whole

somehow similar to first, for rating 5, we can say that most of the reviews are about the product

cloud.by.tf_idf.cutoff(cutoff = 0.15,stopwords = yelp.stopwords) -> fifth
## `summarise()` has grouped output by 'rating'. You can override using the `.groups` argument.
fifth$by.rating

fifth$as.whole

the result is comparable to previous cutoff (fourth)

Experimenting

At this point, using cutoff 0.10 and 0.15 gives informative wordcloud grouped by rating for both data with original stop words removed and for additional words removed.

So I am considering results:

Viewer function

for checking of terms in case we need to check the reviews that includes the interesting term.

viewer <- function(word){
  str_extract(yelp.raw$review_text,paste(".{40}",
                                         word,
                                         ".{40}")) %>% na.omit -> words
  count <- sum((str_count(yelp.raw$review_text,word)>=1)*1)
  #count
  last <- ifelse(count>30,30,as.numeric(count))
  #final filter
  to.print <- words[1:last]
  to.print %>% is.na %>% sum -> count
  len <- length(to.print)
  to.print <- words[1:(len-count)]
  
  return(to.print)
}

viewer("moon")[1:10] #example
##  [1] "whats in it, but man are the coffee with moon milk in them GOOD. Basically, Summer Moo"
##  [2] "elings for summer moon.  I tried out the moon menu with the moon milk that they use an"
##  [3] " I got the Winter Moon (iced coffee with moon milk and coffee). I got my coffee shortl"
##  [4] "ll splurge and add some of their special moon milk.  The moon milk is delicious but VE"
##  [5] "e and great drinks!  Order anything with moon in the title and you won't be disappoint"
##  [6] "o to is a Half Moon coffee, which is 1/2 moon milk and half regular milk. It is just t"
##  [7] " as most coffee. Seriously what's in the moon milk? I guess if we knew the secret, we "
##  [8] " but I wished I tried the regular summer moon first as I heard it's amazing. The pumpk"
##  [9] "For some reason their summer moon/winter moon tastes a bit different from the south lo"
## [10] "ard of Summer Moon.  The hype over their moon milk was enticing, and I was truly inter"

Calculation of perplexity for all (VEM)

list.data <- list(first = first,
                  third = third,
                  fourth = fourth,
                  fifth = fifth)
perplex.data <- data.frame(perplexity = rep(NA,20),
                           data = c(rep("first",5),
                                    rep("third",5),
                                    rep("fourth",5),
                                    rep("fifth",5)),
                           k.value = rep(c(10,15,20,25,30),4))
perplex.vector <- c()
set.seed(1234)

for (list in list.data){
  cleaned.data <- list$cut.data
  set.seed(10)
  sampled_test <- sample(unique(cleaned.data$review.id), size = 500, replace = F)
  yelp_test <- cleaned.data %>%
                  filter(review.id %in% sampled_test)
  
  yelp_train <- cleaned.data %>%
                  filter(!(review.id %in% sampled_test))
  
  ## DTM (document-term-matrix)
  yelp_train_dtm <- yelp_train %>%
                cast_dtm(document = review.id, term = word, value = n)
  yelp_test_dtm <- yelp_test %>%
                cast_dtm(document = review.id, term = word, value = n)
  
  ##Tuning Hyperparameters
  #Fit LDA at different value of K, beta = 0.01, alpha = 0.10
  
  list_lda_models <- lapply(
                           X = c(10,15,20,25,30), 
                          FUN = function(x) LDA(x = yelp_train_dtm, method = "VEM", k = x, 
                                                  control = list('alpha' = 0.50, seed = 3201))
                        )
  
  perp.x <- sapply(list_lda_models,function(x) (perplexity(x, yelp_test_dtm)))
  perplex.vector <- c(perplex.vector,perp.x)
  

}

perplex.data$perplexity <- perplex.vector

#View visual
ggplot(data = perplex.data)+
  geom_line(mapping = aes(x = k.value,y = perplexity,
            col = data))+
  xlab("topics")+
  ylab("perplexity")+
  labs(title="Number of Topics vs Perplexity")+
  theme_gdocs()

for “first” data, we can use k = 20

for “third” data, k = 30

for “fourth” data, k = 20 or 30

for “fifth” data, k = 30

Fitting a model

params <- c(20,30,20,30,30) #first, third, fourth, fourth, fifth
model.store <- list()

for(i in 1:5){
  if(i >= 4){
    cleaned.data <- list.data[[(i-1)]]$cut.data
  } else cleaned.data <- list.data[[i]]$cut.data
  
  set.seed(10)
  sampled_test <- sample(unique(cleaned.data$review.id), size = 500, replace = F)
  yelp_test <- cleaned.data %>%
                  filter(review.id %in% sampled_test)
  
  yelp_train <- cleaned.data %>%
                  filter(!(review.id %in% sampled_test))
  
  ## DTM (document-term-matrix)
  yelp_train_dtm <- yelp_train %>%
                cast_dtm(document = review.id, term = word, value = n)
  yelp_test_dtm <- yelp_test %>%
                cast_dtm(document = review.id, term = word, value = n)

  sample_lda <- LDA(x = yelp_train_dtm, k = params[i], method = "VEM", control = list(alpha = 0.50, seed = 3201))
  model.store[[i]] <- sample_lda
}

Describing Topics

By top 3 topic

Each document has probability for each topic, for each topic, gamma is summed for all document - theta. based on theta, top 3 topic is picked.

The word cloud for each topic is based on beta - which is the probability of word in that particular topic.

This way, it is easier to interpret each model

top3.wc <- function(model){
  theta.all <- data.frame(topic = c(),
                          theta = c())
  topics_beta <- tidy(model, matrix = "beta")

  for(i in 1:30){
  model %>%
    tidy(matrix = "gamma") %>%
    filter(topic == i) %>%
    arrange(desc(gamma)) %>% group_by(topic) %>% summarise(theta = sum(gamma)) %>%
      as.data.frame -> per.topic
  
    theta.all<-rbind(theta.all,per.topic)
  }
  theta.all$topic <- as.factor(theta.all$topic)
  
  theta.all %>% arrange(desc(theta)) %>% slice_head(n=3) -> top.3
  set.seed(10)
      topics_beta %>%
        group_by(topic) %>%
        filter(topic == top.3[1,1] | topic == top.3[2,1]| topic == top.3[3,1]) %>%
        mutate(topic = str_c("topic ",topic)) %>%
        top_n(n = 25, wt = beta) %>%
        mutate(term = fct_reorder(term, .x = beta, .fun = mean)) %>%
        ggplot(aes(label = term, size = beta)) +
        geom_text_wordcloud() +
        scale_size_area(max_size = 8) +
        theme_minimal()+
        facet_wrap(~ topic,
                   labeller = as_labeller(1:4))-> cloud
        
        return(list(top.3 = top.3[,1],
               cloud  = cloud))
  }

model from “first” data using k = 20

top3.wc(model.store[[1]])$cloud

model from “third” data using k = 30

top3.wc(model.store[[2]])$cloud

model from “fourth” data using k = 20

top3.wc(model.store[[3]])$cloud

model from “fourth” data using k = 30

top3.wc(model.store[[4]])$cloud

viewer("work")[1:10]
##  [1] "me here if you're looking for a place to work remotely! There is no WiFi and no intern"
##  [2] "c vibe. I dropped in to catch up on some work and found a fantastic vibe and a great s"
##  [3] "l place to just hang out with friends or work on homework. You'll probably see the vib"
##  [4] "--no internet...not really a place to do work or study, but great place to chill! My f"
##  [5] "rything else Sophie touch! Can't wait to work with you again, Sophie! I wish I could h"
##  [6] " Party Company was absolutely amazing to work with. She puts in a huge amount of time "
##  [7] "She is so professional and delightful to work with. Her inventory of  vibe is so beaut"
##  [8] "he patrons keep to themselves while they work on laptops, read, etc.  Always a pleasur"
##  [9] "nd seating if you are trying to get some work done/study. I got the lavender lemonade "
## [10] "king can be a hassle even during weekday work hours when most vibe would be working. T"
viewer("french")[1:10]
##  [1] "food, alcohol, whiskey, tea, laughs, and french gypsy jazz vibe accompanied our afternoo"  
##  [2] "ecial by Austin standards, no pour over, french press, or other small batch coffee metho"  
##  [3] "even bought some and used it when making french food and it was delicious!! The service "  
##  [4] "avor of some kind. I've also heard their french press is great, but haven't tried it yet"  
##  [5] "ffee may be very particular. I like dark french roast but when I tried the \"regular\" cof"
##  [6] "p (side note: the coffee is all done via french press, which is cool).  The vibe visitin"  
##  [7] "ff I am a coffee maniac. I coffee my own french press cup one at a time at home and hand"  
##  [8] "avor of some kind. I've also heard their french press is great, but haven't tried it yet"  
##  [9] "ffee may be very particular. I like dark french roast but when I tried the \"regular\" cof"
## [10] "e array of coffee makers, from chemex to french press to coffee coffee makers. It's a ve"

model from “fifth” data using k = 30

top3.wc(model.store[[5]])$cloud

Further Exploring fourth data using k = 25

  cleaned.data <- fourth$cut.data
  set.seed(10)
  sampled_test <- sample(unique(cleaned.data$review.id), size = 500, replace = F)
  yelp_test <- cleaned.data %>%
                  filter(review.id %in% sampled_test)
  
  yelp_train <- cleaned.data %>%
                  filter(!(review.id %in% sampled_test))
  
  ## DTM (document-term-matrix)
  yelp_train_dtm <- yelp_train %>%
                cast_dtm(document = review.id, term = word, value = n)
  yelp_test_dtm <- yelp_test %>%
                cast_dtm(document = review.id, term = word, value = n)
  fourth.k25.model <- LDA(x = yelp_train_dtm, k = 25, method = "VEM", control = list(alpha = 0.50, seed = 3201))

top3.wc(fourth.k25.model)$cloud

Further Exploring first data using k = 10 checking out interpretability

  cleaned.data <- fourth$cut.data
  set.seed(10)
  sampled_test <- sample(unique(cleaned.data$review.id), size = 500, replace = F)
  yelp_test <- cleaned.data %>%
                  filter(review.id %in% sampled_test)
  
  yelp_train <- cleaned.data %>%
                  filter(!(review.id %in% sampled_test))
  
  ## DTM (document-term-matrix)
  yelp_train_dtm <- yelp_train %>%
                cast_dtm(document = review.id, term = word, value = n)
  yelp_test_dtm <- yelp_test %>%
                cast_dtm(document = review.id, term = word, value = n)
  first.k10.model <- LDA(x = yelp_train_dtm, k = 10, method = "VEM", control = list(alpha = 0.50, seed = 3201))

top3.wc(first.k10.model)$cloud

Further Exploring first data using k = 25

  cleaned.data <- fourth$cut.data
  set.seed(10)
  sampled_test <- sample(unique(cleaned.data$review.id), size = 500, replace = F)
  yelp_test <- cleaned.data %>%
                  filter(review.id %in% sampled_test)
  
  yelp_train <- cleaned.data %>%
                  filter(!(review.id %in% sampled_test))
  
  ## DTM (document-term-matrix)
  yelp_train_dtm <- yelp_train %>%
                cast_dtm(document = review.id, term = word, value = n)
  yelp_test_dtm <- yelp_test %>%
                cast_dtm(document = review.id, term = word, value = n)
  first.k25.model <- LDA(x = yelp_train_dtm, k = 25, method = "VEM", control = list(alpha = 0.50, seed = 3201))

top3.wc(first.k25.model)$cloud

From VEM models, the combination of terms doesn’t seem logical/the terms does not fit together.

Exploring Gibbs method for LDA

Calculation of perplexity for all (Gibbs)

list.data2 <- list(first = first,
                  third = third,
                  fourth = fourth,
                  fifth = fifth)
perplex.data2 <- data.frame(perplexity = rep(NA,20),
                           data = c(rep("first",5),
                                    rep("third",5),
                                    rep("fourth",5),
                                    rep("fifth",5)),
                           k.value = rep(c(10,15,20,25,30),4))
perplex.vector2 <- c()
set.seed(1234)

for (list in list.data){
  cleaned.data <- list$cut.data
  set.seed(10)
  sampled_test <- sample(unique(cleaned.data$review.id), size = 500, replace = F)
  yelp_test <- cleaned.data %>%
                  filter(review.id %in% sampled_test)
  
  yelp_train <- cleaned.data %>%
                  filter(!(review.id %in% sampled_test))
  
  ## DTM (document-term-matrix)
  yelp_train_dtm <- yelp_train %>%
                cast_dtm(document = review.id, term = word, value = n)
  yelp_test_dtm <- yelp_test %>%
                cast_dtm(document = review.id, term = word, value = n)
  
  ##Tuning Hyperparameters
  #Fit LDA at different value of K, beta = 0.01, alpha = 0.10
  
  list_lda_models <- lapply(
                           X = c(10,15,20,25,30), 
                          FUN = function(x) LDA(x = yelp_train_dtm, method = "Gibbs", k = x, 
                                                  control = list('alpha' = 0.50, seed = 10))
                        )
  
  perp.x <- sapply(list_lda_models,function(x) (perplexity(x, yelp_test_dtm)))
  perplex.vector2 <- c(perplex.vector2,perp.x)
  

}

perplex.data2$perplexity <- perplex.vector2

#View visual
ggplot(data = perplex.data2)+
  geom_line(mapping = aes(x = k.value,y = perplexity,
            col = data))+
  xlab("topics")+
  ylab("perplexity")+
  labs(title="Number of Topics vs Perplexity")+
  theme_gdocs()

for “first” data, we can use k = 30

for “third” data, k = 20

for “fourth” data, k = 25

for “fifth” data, k = 20

Fitting a model

params <- c(30,20,25,30) #first, third, fourth, fifth
model.store2 <- list()

for(i in 1:4){
    cleaned.data <- list.data2[[i]]$cut.data

  set.seed(10)
  sampled_test <- sample(unique(cleaned.data$review.id), size = 500, replace = F)
  yelp_test <- cleaned.data %>%
                  filter(review.id %in% sampled_test)
  
  yelp_train <- cleaned.data %>%
                  filter(!(review.id %in% sampled_test))
  
  ## DTM (document-term-matrix)
  yelp_train_dtm <- yelp_train %>%
                cast_dtm(document = review.id, term = word, value = n)
  yelp_test_dtm <- yelp_test %>%
                cast_dtm(document = review.id, term = word, value = n)

  sample_lda <- LDA(x = yelp_train_dtm, k = params[i], method = "Gibbs", control = list(alpha = 0.50, seed = 10))
  model.store2[[i]] <- sample_lda
}

Describing Topics

By top 3 topic

model from “first” data using gibbs method, k = 30

top3.wc(model.store2[[1]])$cloud

model from “third” data using gibbs method, k = 20

top3.wc(model.store2[[2]])$cloud

model from “fourth” data using gibbs method, k = 25

top3.wc(model.store2[[3]])$cloud

model from “fifth” data using gibbs method, k = 30

top3.wc(model.store2[[4]])$cloud

viewer("times")[1:10]
##  [1] "tle cafe! I've driven by this place many times and hadn't noticed it until today. Glad "
##  [2] " good but not great.  Worst of all, both times I went in there it smelled badly like a "
##  [3] " and good noise level. It can be vibe at times and slightly noisy at other times. I lik"
##  [4] "parkings in total. Also, during the peak times in the afternoon, snagging a comfortable"
##  [5] " 4 stars when I've gone back a couple of times to make sure poor/erratic service isn't "
##  [6] "o to coffee shop. I stop here at least 5 times a week before heading into work. Super f"
##  [7] "laid-back vibe. Unfortunately, all three times I've gone there to work, it's been way t"
##  [8] "is a hit-or-miss place for me. There are times when I come and get the best service and"
##  [9] "isty here, which is ok sometimes.  Other times it can feel a bit vibeed here.  The tea "
## [10] "ter. I live in Austin so I know the best times to come (when there aren't as many touri"

Further Exploring fourth data using k = 20

  cleaned.data <- fourth$cut.data
  set.seed(10)
  sampled_test <- sample(unique(cleaned.data$review.id), size = 500, replace = F)
  yelp_test <- cleaned.data %>%
                  filter(review.id %in% sampled_test)
  
  yelp_train <- cleaned.data %>%
                  filter(!(review.id %in% sampled_test))
  
  ## DTM (document-term-matrix)
  yelp_train_dtm <- yelp_train %>%
                cast_dtm(document = review.id, term = word, value = n)
  yelp_test_dtm <- yelp_test %>%
                cast_dtm(document = review.id, term = word, value = n)

  fourth.k20.model.gibbs <- LDA(x = yelp_train_dtm, k = 20, method = "Gibbs", control = list(alpha = 0.50,seed=10))
top3.wc(fourth.k20.model.gibbs)$cloud

among these models, fifth data with k = 30 is the most coherent.

topic 7 - is about customer experience, ameneties and facilities

topic 25 - is about customer service experience, either good or bad

topic 29 - is about wide selection of food and drinks

Info about the chosen model including sample review

Categorization document is categorized by topic based on highest probability of topic in a document (gamma). Therefore, the data frame below shows each document and the topic it most likely belongs.

model.store2[[4]] %>%
  tidy(matrix = "gamma") %>% mutate(document = as.double(document)) %>% group_by(document) %>% arrange(desc(gamma)) %>%
  slice_head(n=1) -> d.frame2
d.frame2

Visualizing probability of each document in their respective topic

ggplot(data = d.frame2, aes(x = as.factor(topic),y=gamma, label = document))+
  geom_point(aes(col=as.factor(topic)))+
  geom_label_repel(data = d.frame2 %>% group_by(topic) %>% arrange(desc(gamma)) %>% slice_head(n=1),
                  box.padding   = 0.1, 
                  point.padding = 0.5,
                  segment.color = 'grey50',
                  segment.size=0.4,direction = "y",
                  nudge_y = 0.2)+
  ylim(c(0,0.8))+
  theme(legend.position="none")+
  xlab("topics")+
  labs(title="Probability of each document in their respective topic",
       subtitle = "The document number above each topic best describes that topic based on probability")

So for the chosen model,

topic 7 - review.id 1063 topic 25 - review.id 6261 topic 29 - review.id 1153

fetching the review

fetch.docs <- c(1063,6261,1153)
topics <- c(7,25,29)
for(i in 1:3){
  print(paste("topic",topics[i]))
  yelp.raw %>% filter(review.id == fetch.docs[i]) %>% select(review_text) %>% as.matrix %>% print()
}
## [1] "topic 7"
##      review_text                                                                                                                                                                                                  
## [1,] "I've been coming here for a few years now. They have a great selection of coffee and tea. It's pretty mellow in here so it's ideal for working or studying. services are pretty nice overall, hard to beat."
## [1] "topic 25"
##      review_text                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
## [1,] " Listed in The Amy D List - \"Royalty\", OMG! Another Trailer Food LIst..., \"Nuptial Coffee Bliss!\" The Power of Yelp & social media: I had just checked-in to Micklethwait to visit the #AustinBakes display, when I get a text from Amy D to check out Sister Coffee in the same lot. I wouldn't have known it was there if Amy hadn't mentioned it. It truly is the tiniest trailer in Texas.  But putting out some mighty big coffee. (Oh jeez, you did see that coming, didn't you?) It apparently is two sisters working alternate schedules, I had a fun convo about roasters with Saturday Katie (hope I got her name right), and she made me a hot black pour-over using my cup. I didn't notice the cost or origin, but it was good stuff.  Know your Oreos? Running a trailer has got to be tough, but running a coffee trailer has got to be Double-Stuff tough. Especially in a Starbucks Universe. If this were my hood, yeah, I'd be hanging out here. When I'm around, I'll definitely pop by. Wish I had a sister."
## [1] "topic 29"
##      review_text                                                                                                                                                                                                                      
## [1,] "First time checking this place out first time. Pretty swag. Nice assortment of drinks. I got the 15 minutes of fame. Also got the poptart. Delicious!  Next time I'll try the Marymount Monroe. Will definitely come back soon."
info.model <- function(model){
  d.frame2%>% 
  left_join(yelp.raw %>% select(review.id,rating) 
            %>% mutate(document = as.double(review.id)) 
            %>% select(document,rating)) %>% ungroup -> d.frame
  
  top3<-top3.wc(model)$top.3 %>% as.numeric() #extract top3 topic from the model
  top3.dframe <- d.frame %>% filter(topic == top3[1]|topic == top3[2]|topic == top3[3]) #docu dataframe with only 3 topic
  
  d.frame %>%
  ggplot() + geom_bar(aes(x=rating)) +
    labs(title = "Frequency of document by rating from this model")+
    xlab("count")+
    geom_label(aes(x=2,y=3000), label = paste("total documents in top3: ",
                                             nrow(top3.dframe),
               " = ",(100*nrow(top3.dframe)/nrow(yelp.raw)) %>% round(2),"%"))+
    geom_label(aes(x=2,y=2500), label = paste("total documents in model: ",
                                             nrow(d.frame),
               " = ",(100*nrow(d.frame)/nrow(yelp.raw)) %>% round(2),"%"))-> gplot
  
  #sample text
  sample.text <- function(){
    top3.dframe %>%
      group_by(topic) %>% 
      slice_sample(n=2) %>% 
      ungroup %>% select(topic,document) %>%
      left_join(yelp.raw %>% transmute(document = as.double(review.id), review_text = review_text),by="document") -> transcript

    for(i in unique(transcript$topic)){
      print(paste("topic",i))
      filter(transcript,topic==i) %>% select(review_text) %>% as.list %>% print()
      
    }
  }
      return(list(top3.dframe = top3.dframe,
                  gplot = gplot,
                  sample.text = sample.text))
    
  
}

#fifth data k = 30, method = gibbs
info.model(model.store2[[4]]) -> fifth.k30.info
## Joining, by = "document"
fifth.k30.info$gplot

Sample review_text from top3 topic from model using “fifth” data and k=30

finding the best review for the topic

set.seed(215)
fifth.k30.info$sample.text()
## [1] "topic 7"
## $review_text
## [1] "The coffee is incredible and the service is cheery and engaging  even at 6:00AM !)"                                                                                          
## [2] "Waiting times are huge if you go during the weekend or at prime time. However this is understandable, the vibe is great and the service is even better. The food is amazing."
## 
## [1] "topic 25"
## $review_text
## [1] "I've finally found my Austin coffee shop! Fun comfy seating, delicious food, delectable drinks and friendly helpful service all at great price."                                                                                                                      
## [2] "I REALLY like the vibe at this place. Really wooden, rustic, communal, homey. They had a fire going, seating everywhere, pecan pumpkin whiskey and live vibe. I can definitely see myself hanging out here more often! The BBQ my friends got looked really good too!"
## 
## [1] "topic 29"
## $review_text
## [1] " This lil' local coffee spot is about 20 feet from my doorstep. I cannot tell you how many countless times DJ's has saved me.  Their coffee is amazing and the service there just delightful! They have food foods daily from foodDeli (score!) and the best part? Every time you buy a cup of coffee from these local dudes, it benefits kids back in the Dominican Republic (where the coffee is purchased by a local farming community) by putting that money towards education. It's like the TOMS shoe model, only, it's education based.  They have fantastic food products like banana food (my favorite) as well as bagels and smoothies. No complaints on this place.  It's local. It's local. And most of all, lovable."                                                                                                                                                                                                                                                                                                                                                                                                                                 
## [2] "Listed in \"Nuptial Coffee Bliss!\", \"Editor's Note...\" I MISSED the ELITE event but INVENTED the GAME so I STORMED in here with \"I'M A HOT-SHIT YELPER!\" \"I AM CONSUMER GOD!\" \"I WILL MAKE or BREAK YOU!\" \"GIMMEE LATTES SCONES MUFFINS BEER WINE!\" \"I don't EVEN LIKE alcohol or alcohol!\" GIMMEE GIMMEE GIMMEE! LOCAL STUFF BABY! FIVE-STARS FOR MAMA! NOM NOM NOM!  ************************  (Editor's note: It's difficult sometimes to discern Errol M's moods after his Botox treatments, which is when we received this review. We do know he's very polite, consumes only straight black coffee in coffeehouses, and doesn't touch parkingbs in the afternoon. He also gave the original Patika coffee trailer on Congress five-stars after having their coffee. The new South Lamar brick & mortar offers fast WiFi, seating, alcohol & alcohol, and sweet & savory food. We'll go ahead and let this review stand, since he posted a corroborating #errolsmug photo of his straight black coffee there.) Hip & cool, bright & contemporary, friendly & efficient. Good coffee, fast internet, alcohol & alcohol. I'd parking on Kinney St."
set.seed(861)
fifth.k30.info$sample.text()
## [1] "topic 7"
## $review_text
## [1] "Three of my favorite things in life: coffee, cocktails, and food. Great location too. Visited a few times so far, first time had food, other times had drinks, coffee, and s'mores on the last date. Only comes with one or two services so would have been down for a double order. Because of the night time buzz vibe seem very social even in the day time for a cafe when you want to just be working with internet. Not the silent-whisper to your friend kind of cafe. Good times."
## [2] "My favorite for outdoor seating at a coffee shop. Some shade, internet, free internet, vibe, chill. If their seating could have cushions, I would work here everyday.  service are very nice and incredibly pleasant all around.  The coffee is too bad price for what you actually get,but Ill keep coming back for the vibe."                                                                                                                                                           
## 
## [1] "topic 25"
## $review_text
## [1] "The service is nice. I tried a coffee from here, but it was pretty bland. I will give this place another try, but for now, I wouldn't recommend it."              
## [2] "I like to bring clients here. The vibe is fantastic! The service is always very friendly. I highly recommend this place. Great place to get work done. Delicious!"
## 
## [1] "topic 29"
## $review_text
## [1] "They don't actually have cats here, bummer...but the vibe is super cute.  This is a comprehensive coffee/sweet tooth spot! They offer gf ice cream cones (sugar and regular cones) and a variety of gf foods.  I got the gf chocolate chip and almond milk chai. The chai takes a few minutes to make since they coffee to order.  It was pretty good but was a little heavy on the cinnamon and ginger side for my liking. The gf food though!  Yummy!  Just the right amount of chocolate chips and chewy, my fav quality. Will def come back and try their cold Almond milk chai and gf chocolate chip food."
## [2] "Modest coffee shop with delicious coffee. I was very impressed with the bold, woody flavor. They also serve food foods from food Deli. (I think it was food Deli)"
set.seed(330)
fifth.k30.info$sample.text()
## [1] "topic 7"
## $review_text
## [1] "Not an ideal place for studying. I couldn't help but to first notice the vibe, which was not only super loud but bad too... Just really bad vibe. And the seatings are very small.  My coffee was mediocre as well.  Oh and customer service wasn't good either... The guy with the eye-glasses had an attitude. Or he was just parkingeless."        
## [2] "Excellent litlte roaster and blender.  I drank their Jakarta Blend for the duration of my month in Austin.  However, if you are looking for a place to get a cup of coffee and dig in for a while, this is really not it--Anderson's is much more a coffee purveyor than a coffee shop, though they do have two blends available by the cup each day."
## 
## [1] "topic 25"
## $review_text
## [1] "Deeeeeelicious coffee (and other stuff but I've only had the coffee based drinks) out of a trailer in a parking lot. There are lots of trees around and the street Flitch is on feels a lot older than a lot of other parts of town. I've only ever gotten my coffee to go but there is a little tiny space to sit inside the trailer, and lots of outdoor seating. Plenty of off street parking and some parking in the little lot they're in, too. They have foods from Tyson's foods and some food goods, too.  If I enjoyed hanging out at coffee establishments I'd probably hang out here."
## [2] "Best coffee bar I've found in Austin. Nice service, great coffee and food. Dedicated vibe seating, nice seating. internet has always functioned well for me."                                                                                                                                                                                                                                                                                                                                                                                                                                    
## 
## [1] "topic 29"
## $review_text
## [1] "Modest coffee shop with delicious coffee. I was very impressed with the bold, woody flavor. They also serve food foods from food Deli. (I think it was food Deli)"
## [2] "-Great [cheap and fast] food burritos -Great alcohol selection -Pleasant vibe  Check it out if you haven't"
set.seed(1774)
fifth.k30.info$sample.text()
## [1] "topic 7"
## $review_text
## [1] "Great 24-hour coffee shop with a full menu of healthy options. Skip the foods, they are from Costco... Friendly service! The internet is solid.  The cat is out of the bag now - it was packed to the brim the other night with students studying."                                                                                                                                                                   
## [2] "One star away because of the drinks. I have much better milk tea from everywhere else.  4 stars because of everything else here. The vibe, the service, the cleanliness, the modernization, the vibe, etc - everything made it a very pleasant visit for just a friend gathering or a study place. It's very cute. Love the store's layout. So-so jasmine milk tea. The boba itself was great. Very inspirational. :)"
## 
## [1] "topic 25"
## $review_text
## [1] "Excellent coffee in an adorable little wooden trailer. Enjoy while you wait for Franklins. And if you're from the Bay Area or Santa Cruz, you'll be pleased to know the vibe who sources their beans worked previously for Verve. They know what they're doing!  We ordered coffee and a pumpkin scone. Both highly recommended."
## [2] "Nice chill vibe and good strong toddy. Nice place to take a break from the heat. :)"                                                                                                                                                                                                                                             
## 
## [1] "topic 29"
## $review_text
## [1] "Only place my partner and I go to buy our coffee.  Their Italian coffee is delectable.  The smell and the taste are beyond any other coffee I have tried.  Great service, and I highly recommend this place for buying your coffee.  It is not a sit down place, but Russells next door service Anderson coffee :)"
## [2] "+Great coffee -Drip coffee was okay +Lots of non dairy milk choices -Very few gluten free options"
set.seed(1784)
fifth.k30.info$sample.text()
## [1] "topic 7"
## $review_text
## [1] "Listed in Coffee Shop Working One of my favorite places to work. Great vibe, really friendly service."                                                                                                                                                                                                                                                                                                                                                                                                            
## [2] "Their new location is on east 6th & I was super eager to try this new spot. They were only 10 days old when I stopped in so things might have changed since then (eek - took me a hot minute to write my review).  The service was great and my drink was delicious... but I was a bit bummed they didn't have WiFi. I love their intention to have patrons connect more vs getting on the internet, but I love coming to great coffee shops (which this is) and getting work done. Yummy coffee with almond milk"
## 
## [1] "topic 25"
## $review_text
## [1] "Really nice coffee shop on South 1st in Austin. The service are friendly and the place is kept quite immaculate. I ordered a coffee to keep things simple, but they have a special drink over there called the Gibraltar, which is apparently similar to a coffee. My coffee was quite good - not the best I've ever had, but the coffee was good and the beverage as a whole was definitely made by an expert.  A vibeal gripe perhaps, but most of their seating is quite communal (ie they have a couple of really large seatings), which makes it awkward for vibe who come alone and just want to sit at their own vibeal seating. Empty mug of coffee"
## [2] "Great coffee - had a couple of well needed quality coffee after several bad coffee elsewhere.  internet was a little sketchy today but they more than made up for it with a cracking 80/90s playlist that oozed good vibe all afternoon."                                                                                                                                                                                                                                                                                                                                                                                                                   
## 
## [1] "topic 29"
## $review_text
## [1] "It's a lovely little tea and food shop, but the service was continuously more and more disappointing as food progressed. I hate writing negative service reviews especially since the food was so fresh and they have lots of GLUTEN FREE food options, but the service was AWFUL. They always have one GF scone, parkingrot food food, and a GF food with other various daily specials which was very exciting for me being GF. The scone and food that I tried where delicious!"
## [2] " My Monday morning stop for fresh roasted beans for work. The coffee club parkingd where you buy 12 pounds and get one free is why I buy here and not from another place I pass on the way to work. Plus you gotta love a place that remembers your name and your regular order every time."
set.seed(562)
fifth.k30.info$sample.text()
## [1] "topic 7"
## $review_text
## [1] "So friendly and great coffee and chai tea. Love the vibe. Can't believe anyone would go to Radio, when this place is 100x better."                                       
## [2] " Best coffee my friend and I have ever had, I'm obsessed! I had a coffee that was unlike any other coffee I have had in the past and that's a lot of coffee to beat out!"
## 
## [1] "topic 25"
## $review_text
## [1] "Updated review 2 check-ins Lots of natural light, decent food, alcoholic and caffeinated beverages, and 3 seatings with internet. They have everything I need to get things done on my laptop.    It's always a little awkward to interact with service. It could be the food and coffee counter immediately inside the front door. They do make an effort, which I appreciate. They don't bring you cutlery, napkins, or condiments with your food. These things can be found at the right end of the bar. No place to grab an extra napkin. Awkward."
## [2] "Nice little place outside the city. Food was pretty good also. Definitely recommend!!"                                                                                                                                                                                                                                                                                                                                                                                                                                                                 
## 
## [1] "topic 29"
## $review_text
## [1] "Listed in Austin Coffee Stumptown?! Chemex?! Gluten-free eclair?!"                                                                                                                         
## [2] "The service was extremely friendly and welcoming. I just had a coffee on my way to a meeting, but this would be a great place to do some work. They also have several local drafts on tap."
set.seed(1547)
fifth.k30.info$sample.text()
## [1] "topic 7"
## $review_text
## [1] "Good coffee, yummy food foods, long line but we got our food quickly! My first time in Austin, but I've noticed there are a lot of vegan and vegetarian options around town. Fun vibe Big ole food foods!"
## [2] "Cheerful and attentive service. Delicious skim coffee in my own mug for $2.  There was even a pleasant breeze blowing!  Truly a great coffee vibe!"                                                       
## 
## [1] "topic 25"
## $review_text
## [1] "Really nice coffee shop on South 1st in Austin. The service are friendly and the place is kept quite immaculate. I ordered a coffee to keep things simple, but they have a special drink over there called the Gibraltar, which is apparently similar to a coffee. My coffee was quite good - not the best I've ever had, but the coffee was good and the beverage as a whole was definitely made by an expert.  A vibeal gripe perhaps, but most of their seating is quite communal (ie they have a couple of really large seatings), which makes it awkward for vibe who come alone and just want to sit at their own vibeal seating. Empty mug of coffee"
## [2] "Their coffee coffee tastes light roast. Nice vibe. Price bad price but nice place to work. vibe  area. Inside couch view"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   
## 
## [1] "topic 29"
## $review_text
## [1] "I absolutely love this coffee shop!  My husband and I come here all the time. Great coffee,  great vibe, and vibe. Best coffee fraps in town! Go here all the time!! :)"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     
## [2] "This is a good place, but not great. The food portions are generous. The tuna food was very good. The coffee was just fine too. The service was as nice as can be. But the cafe isn't what I would consider a comfy place to sit and work. There are a two comfortable seating, but most of the place is hard seating and seatings. There isn't enough seating to just relax and  get work done. Plus, it has a rather tense vibe exuded by the stressed out students who fill the place. The outside seating is very nice, but I couldn't get a strong enough internet connection out there to work. I had to wait until someone left inside to get a spot."
set.seed(1260)
fifth.k30.info$sample.text()
## [1] "topic 7"
## $review_text
## [1] "So cool that they serve craft alcohols and coffee here! Cute little spot that just hoursed in their up-and-coming trendy area. I enjoy the vibe here very much and I could totally see myself coming here to study or hang out. coffee with almond milk"
## [2] "Very cool place for a quick stop and fill up. Love it guys. We need one in San Antonio."                                                                                                                                                                
## 
## [1] "topic 25"
## $review_text
## [1] "We had their Texas Pecan Pie food today-- delicious! I understand why they were a finalist at the food World Tour. I highly recommend."                                                                                                                                                                                                                                                                       
## [2] " Never mind the chain coffee shops! Support your local coffer house... or trailer! Tucked away behind Franklin BBQ, this hand built wooden trailer is cranking out some serious grinds. Tea, cold coffee, coffee, you name it. The quality is very high and you can taste it with every sip. If you're planning on standing in line for BBQ, grab a cup of coffee to start the morning off on the right foot."
## 
## [1] "topic 29"
## $review_text
## [1] "I never write Yelp reviews, ever. But when I parkinged in here it felt like I was in a hipster Alice in wonderland. The vibe is spot on and they left no details out. The Marymint Monroe is really amazing and the food and food is really good. The service is so wonderful too, you could tell they really love what they're doing. The swings are a nice touch too!"
## [2] "My review is based on SXSW. We parking up. Super chill vibe. Some of the best vibe in the location. I get some drinks, and I ask, \"How much?\" She says, \"Free.\" I say, \"Ha ha. No seriously. How much?\" She says, \"It's free. No joke.\" I say, \"Whoa. I love TOMS.\""

end